
#### JURISDICTIONAL ANALYSIS (STATE-TO-STATE) ####
# Raw data for Tables 4, 5, A.1; Figure A.6
require(tigris)
require(tidycensus)
require(sp)
require(rgdal)
require(sf)
require(foreign) 

WORK = file.path('Z:/user/ajk41/Solar siting/Scripts/JAERE Code Repository/Data JAERE')
WORK.OUT = file.path(WORK, 'intermediate_output')




#### Data Prep ####

#--> Load damages
dzip_poll = readRDS(file.path(WORK.OUT, 'Damages','CEC_to_damages_by_POLL.rds'))
dzip_poll = dzip_poll[POLL=='CO2',c('total_damages',FIPSnames):=0]  #--> # This analysis drops all CO2. CO2 damages cannot be allocated to specific counties.

#--> Mappings for state codes, abbreviations, and names (for table output)
data(fips_codes)
STATE_to_state_name = fips_codes %>% dplyr::select(STATE = state, state_code, own_state_code = state_code, state_name) %>% distinct()

zipmap = st_as_sf(readRDS(file.path(WORK, 'FIPS Shapefiles', "USA_Zip_Code_Boundaries_v104_zip_poly.rds"))) %>% 
  st_set_geometry(NULL) %>%
  dplyr::select(zip = czip, STATE) %>%
  left_join(STATE_to_state_name, by='STATE') 


#---> Get the fields that are county FIPS:
FIPSnames = names(dzip_poll)[!is.na(as.numeric(names(dzip_poll)))]  #--> OK to have an error; that's what I want.
BASEnames = names(dzip_poll)[is.na(as.numeric(names(dzip_poll)))]

FIPSnames.split = paste0(substr(FIPSnames, start=1, stop=2), '-', substr(FIPSnames, start=3, stop=5))
names(dzip_poll)[!is.na(as.numeric(names(dzip_poll)))] = FIPSnames.split  #--> ok to have an error again.


#---> Caution - 374M row transformation!
xx = melt.data.table(dzip_poll, measure.vars = FIPSnames.split, value.name='annual.damages', variable.name = 'county.fips')
xx[,c('state_code', 'county_code'):=(tstrsplit(x=county.fips, split='-'))]




#### Figure A.6 In-State Shares of Avoided Local Pollution Benefits (raw data) ####
#--> Sum state-by-state impacts over each zip. Merge corresponding own-state, and calculate how much of total benefits accrue to own-state.
ben1 = xx[,j=list(annual.damages = sum(annual.damages, na.rm=T)), by = c('zip','state_code')]
ben1 = merge(ben1, zipmap, by='zip')[,is_own:=as.numeric(state_code==own_state_code)]

# For each zip in a state, compute total avoided damages and avoided damages in state. 
# Then average across zip codes the avoided damages in state to give the average *level* of in-state benefits. 
# Also, compute average ratio of in-state avoided damages to total avoided damages.
# Max level and max ratio (obtained by passing a "max" command to the zip-level avoided damages in state and zip-level ratios.
zstate.distribution = ben1[,j = list(annual.damages = sum(annual.damages)), by = c('own_state_code','zip','is_own')]
zstate.distribution = zstate.distribution[,j = list(pct_own_state = annual.damages[is_own==1]/sum(annual.damages),
                                                    annual_avoided_damages_instate = annual.damages[is_own==1],
                                                    annual_avoided_damages_outstate = annual.damages[is_own==0],
                                                    annual_avoided_damages_total = sum(annual.damages)), by = c('own_state_code','zip')]


saveRDS(zstate.distribution, file.path(WORK.OUT, paste0('ShareCaptured.rds')))  #--> For Figure A.6




#### Data prep for state-to-state mapping ####
panel_counts = fread(file.path(WORK, 'NumberSystems.csv')) %>% # 10132018
  dplyr::select(zipcode, num_systems) %>%
  dplyr::filter(zipcode!=0) %>%
  dplyr::mutate(zip = formatC(zipcode, format = 'd', digits=4, flag=0)) %>%
  dplyr::select(-zipcode)


#--> Every row in xx is a pollutant damage that occurs in a different state and county. We want to aggregate across counties to get a total state damage (by state) for each zip
state_to_state = xx[,j = list(annual_damages = sum(annual.damages)), by=c('zip', 'state_code')][,state:=STATE_to_state_name$STATE[match(state_code, STATE_to_state_name$own_state_code)]][order(zip, state_code),] 

#--> Wide: one column for each state damaged
state_to_state = merge(x = dcast(state_to_state, zip  ~ state, value.var='annual_damages'), 
                       y = panel_counts, 
                       by='zip', all.x=T, all.y=F)[is.na(num_systems), num_systems:=0]

#--> Retrieve state abbr. for each zip
state_to_state = merge(x = state_to_state, y = zipmap %>% dplyr::select(zip, STATE) %>% distinct(), by='zip', all.x=T, all.y=F)
setcolorder(state_to_state, neworder = c('zip','STATE','num_systems'))

state_cols = setdiff(names(state_to_state), c('zip','STATE','num_systems'))






#--> Each zip has a different set of avoided damages across states (columns)
#      To collapse zips (location of capacity) to state, we need to weight across zipcodes within each state.
#      We use installed capacity (num_systems) for each zipcode as weight so that each row is a typical panel in some state, 
#      and each column is the total state avoided damages from a typical panel in the focal (row) state.

num_systemss = state_to_state$num_systems
state_to_state_installed_capacity = copy(state_to_state)[,(state_cols):=lapply(.SD, function(x){ x = x*num_systemss }), .SDcols=(state_cols)][,j = lapply(.SD, sum), .SDcols=(c(state_cols, 'num_systems')), by = STATE][order(STATE),]

num_systemsss = state_to_state_installed_capacity$num_systems # summed by state number of systems
state_to_state_installed_capacity[,(state_cols):=lapply(.SD, function(x){ x = x/num_systemsss }), .SDcols=(state_cols)]

stsai.clean = as.data.table(state_to_state_installed_capacity)[,state_cols,with=F]
state_to_state_installed_capacity.clean = copy(state_to_state_installed_capacity)





#### Table 4 and Table A.1: Location of benefits from capacity in focal state ####
state_to_state_installed_capacity = copy(state_to_state_installed_capacity.clean)
stsai = copy(stsai.clean)

state_to_state_installed_capacity[,best_state:= names(stsai)[apply(stsai,MAR=1,which.max)]]
state_to_state_installed_capacity[,own_state_benefits:= diag(as.matrix(stsai))]
state_to_state_installed_capacity[,total_benefits:=rowSums(stsai)]
state_to_state_installed_capacity[,percent_in_state:=own_state_benefits/total_benefits]
diag(stsai)=0

state_to_state_installed_capacity[,out_of_state_benefits:=rowSums(stsai)]
state_to_state_installed_capacity[,best_other_state:= names(stsai)[apply(stsai,MAR=1,which.max)]]
state_to_state_installed_capacity[,best_other_state_benefits:=diag(as.matrix(stsai[,state_to_state_installed_capacity$best_other_state, with=F]))]
state_to_state_installed_capacity[,Ratio:=best_other_state_benefits/own_state_benefits]
setcolorder(state_to_state_installed_capacity, neworder = c('STATE','own_state_benefits','out_of_state_benefits','total_benefits','percent_in_state','best_state','best_other_state','best_other_state_benefits','Ratio'))
state_to_state_installed_capacity = state_to_state_installed_capacity[order(-total_benefits),]
state_to_state_installed_capacity = state_to_state_installed_capacity[STATE!='DC',]
rm(stsai)

state_to_state_installed_capacity[,c('STATE','best_other_state'):=list(STATE_to_state_name$state_name[match(STATE, STATE_to_state_name$STATE)],
                                                                       STATE_to_state_name$state_name[match(best_other_state, STATE_to_state_name$STATE)])]

state_to_state_installed_capacity[,c('own_state_benefits','out_of_state_benefits','percent_in_state',
                                     'best_other_state_benefits','Ratio'):=list(round(own_state_benefits,0),
                                                                                round(out_of_state_benefits, 0),
                                                                                round(100*percent_in_state, 0),
                                                                                round(best_other_state_benefits, 0),
                                                                                round(Ratio, 2))]

saveRDS(state_to_state_installed_capacity[order(-own_state_benefits),.(STATE, own_state_benefits, out_of_state_benefits, percent_in_state)], 
        file.path(WORK.OUT, 'Table4raw_state_to_state_installed_cap.rds'))

saveRDS(state_to_state_installed_capacity[order(-total_benefits),.(STATE, best_other_state, best_other_state_benefits, Ratio)], 
        file.path(WORK.OUT, 'TableA1raw_state_to_state_installed_cap.rds'))

rm(state_to_state_installed_capacity)
rm(stsai)






#### Table 5: Best place for focal state to place capacity ####
stsai = copy(stsai.clean)
state_to_state_installed_capacity = copy(state_to_state_installed_capacity.clean)


stsaip = t((stsai))
dimnames(stsaip)[[2]] = state_to_state_installed_capacity$STATE
stsaip = as.data.table(stsaip)
state_to_state_installed_capacity[,c(state_cols):=stsaip] # drop in stsai', which inverts avoided damages. Each row is now "all the places (states) that avert damages in the focal state"


state_to_state_installed_capacity[,best_state:= names(stsaip)[apply(stsaip,MAR=1,which.max)]]
state_to_state_installed_capacity[,best_state_benefits:=diag(as.matrix(stsaip[,state_to_state_installed_capacity$best_state, with=F]))]
state_to_state_installed_capacity[,own_state_benefits:= diag(as.matrix(stsaip))] # same for transformed data (diag same when taking stsai')
diag(stsaip)=0

state_to_state_installed_capacity[,best_other_state:= names(stsaip)[apply(stsaip,MAR=1,which.max)]]
state_to_state_installed_capacity[,best_other_state_benefits:=diag(as.matrix(stsaip[,state_to_state_installed_capacity$best_other_state, with=F]))]
state_to_state_installed_capacity[,pct_benefits_gain:=round((((best_state_benefits-own_state_benefits)/own_state_benefits))*100, 0)]
setcolorder(state_to_state_installed_capacity, neworder = c('STATE','best_state','pct_benefits_gain','own_state_benefits','best_state_benefits','best_other_state','best_other_state_benefits'))
state_to_state_installed_capacity = state_to_state_installed_capacity[order(-pct_benefits_gain),]

state_to_state_installed_capacity[,c('STATE','best_state'):=list(STATE_to_state_name$state_name[match(STATE, STATE_to_state_name$STATE)],
                                                                 STATE_to_state_name$state_name[match(best_state, STATE_to_state_name$STATE)])]

state_to_state_installed_capacity = state_to_state_installed_capacity[STATE!='DC',]


## Table 5 raw data
saveRDS(state_to_state_installed_capacity[,.(STATE, best_state, pct_benefits_gain)],  
        file.path(WORK.OUT, paste0('Table5raw_state_to_state_installed_cap.rds')))




### Next step: Assemble_results.R to merge everything to one zipmap shapefile, then generate plots


